Emorie D Beck
Visualizing Data in ggplot2
From Wickham (2010):
A grammar of graphics is a tool that enables us to concisely describe the components of a graphic.
load(url("https://github.com/emoriebeck/psc290-data-viz-2022/raw/main/02-week2-ggplot2/01-data/ipcs_data.RData"))
ipcs_data %>%
print(n = 6)# A tibble: 4,222 × 70
SID Full_D…¹ afraid angry atten…² content excited goaldir guilty happy proud
<chr> <chr> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
1 02 2018-10… 1 2 4 4 2 5 2 3 4
2 02 2018-10… 1 1 4 3 2 5 1 3 3
3 02 2018-10… 2 1 2 3 1 2 2 3 2
4 02 2018-10… 2 2 4 3 2 4 1 3 3
5 02 2018-10… 2 1 4 4 3 4 1 3 3
6 02 2018-10… 2 1 4 4 2 4 1 3 3
# … with 4,216 more rows, 59 more variables: purposeful <dbl>,
# agreeableness_Compassion <dbl>, agreeableness_Respectfulness <dbl>,
# agreeableness_Trust <dbl>, conscientiousness_Organization <dbl>,
# conscientiousness_Productiveness <dbl>,
# conscientiousness_Responsibility <dbl>, extraversion_Assertiveness <dbl>,
# extraversion_Energy.Level <dbl>, extraversion_Sociability <dbl>,
# neuroticism_Anxiety <dbl>, neuroticism_Depression <dbl>, …
xycol / color
fillshapesizelinetypexmin / xmax
ymin / ymax
alphacontinuousdiscretemanualordinalbinneddatebrewercontinuouscontinuous scale with our y mappinglimits: vector length 2breaks: vector of any lengthlabels: numeric or character vectorgeom_point()geom_jitter()geom_smooth()geom_hline() / geom_vline()
geom_bar()geom_boxplot()geom_density()geom_histogram()geom_point()
Your basic scatterplot!
geom_point()
Your basic scatterplot!
geom_point()
Your basic scatterplot!
geom_jitter()
This may be too much jitter
geom_jitter()
geom_jitter()
alphageom_smooth()
geom_smooth() allows you to apply statistical functions to your datamethod: “loess”, “lm”, “glm”, “gam”formula: e.g., y ~ x or y ~ poly(x, 2)
se: display standard error of estimate (T/ F)aes() wrapped aesthetics or directly mapped aestheticsgeom_smooth()
Remember: it’s a LAYERED grammar of graphics, so let’s layer!
geom_smooth()
Remember: it’s a LAYERED grammar of graphics, so let’s layer!
geom_hline()/geom_vline()
geom_hline(): horizontal lines have yintercept mappingsgeom_vline(): vertical lines have xintercept mappingsgeom_hline()/geom_vline()
geom_hline(): horizontal lines have yintercept mappings
ipcs_data %>%
filter(SID == "216") %>%
ggplot(mapping = aes(x = purposeful, y = happy)) +
geom_jitter(width = .1, height = .1, alpha = .25) +
geom_hline(
aes(yintercept = mean(happy, na.rm = T))
, linetype = "dashed"
) +
geom_smooth(method = "lm", formula = y ~ x, se = F, color = "blue") +
theme_classic() # I just hate grey backgroundsgeom_vline(): vertical lines have xintercept mappings
ipcs_data %>%
filter(SID == "216") %>%
ggplot(mapping = aes(x = purposeful, y = happy)) +
geom_jitter(width = .1, height = .1, alpha = .25) +
geom_vline(
aes(xintercept = mean(purposeful, na.rm = T))
, linetype = "dashed"
) +
geom_smooth(method = "lm", formula = y ~ x, se = F, color = "blue") +
theme_classic() # I just hate grey backgroundsgeom_bar()
geom_bar()
How often did our participant have an argument, interact with others, study, and feel tired?
ipcs_data %>%
filter(SID == "216") %>%
select(SID, Full_Date, argument, interacted, study, tired) %>%
pivot_longer(
cols = argument:tired
, names_to = "item"
, values_to = "value"
, values_drop_na = T
) %>%
group_by(item) %>%
summarize(value = sum(value == 1)) %>%
ggplot(aes(x = item, fill = item, y = value)) +
geom_col(color = "black") +
theme_classic()geom_bar()
Were there mean-level in our continuous variables?
ipcs_data %>%
filter(SID %in% c("216")) %>%
select(SID, Full_Date, happy, purposeful, afraid, attentive) %>%
pivot_longer(
cols = c(-SID, -Full_Date)
, names_to = "item"
, values_to = "value"
, values_drop_na = T
) %>%
group_by(item) %>%
summarize(
mean = mean(value)
, ci = 1.96*(sd(value)/sqrt(n()))
) %>%
ggplot(aes(x = item, fill = item, y = mean)) +
geom_col(color = "black") +
geom_errorbar(
aes(ymin = mean - ci, ymax = mean + ci)
, position = position_dodge(width = .1)
, width = .1
, stat = "identity"
) +
theme_classic()geom_boxplot()
geom_boxplot()
geom_boxplot()
geom_boxplot()
geom_boxplot()
ipcs_data %>%
filter(SID == "216") %>%
ggplot(mapping = aes(x = purposeful, y = happy)) +
scale_x_continuous(limits = c(1,7), breaks = seq(1,5,2)) +
scale_y_continuous(limits = c(1,7), breaks = seq(1,5,2)) +
geom_jitter(width = .1, height = .1, alpha = .25) +
geom_boxplot(aes(
x = 6
, y = happy
)) +
geom_boxplot(aes(
y = 6
, x = purposeful
)) +
geom_smooth(
method = "lm"
, formula = y ~ x
, se = F
, color = "blue"
) +
theme_classic() # I just hate grey backgroundsgeom_histogram() & geom_density()
geom_histogram() & geom_density()
scale_() functionsipcs_data %>%
filter(SID == "216") %>%
select(SID, beep, afraid:content) %>%
pivot_longer(
cols = afraid:content
, names_to = "item"
, values_to = "value"
) %>%
print(n = 8)# A tibble: 432 × 4
SID beep item value
<chr> <int> <chr> <dbl>
1 216 1 afraid 1
2 216 1 angry 2
3 216 1 attentive 3
4 216 1 content 4
5 216 2 afraid 2
6 216 2 angry 2
7 216 2 attentive 3
8 216 2 content 4
# … with 424 more rows
ggplot2, there are two core faceting functions
facet_grid()facet_wrap()facet_grid()
facet_wrap()
nrow and ncol argumentsfacet_grid()Core arguments:
rows, cols: list of variables or formula, e.g., x ~ y
scales: same x or y scale on all facets?space: same space for unequal length x or y facets?switch: move labels from left to right or top to bottom?drop: drop unused factor levelsfacet_wrap()Core arguments:
facets: barequoted or one-sided formula, e.g., ~ x + y
nrow / ncol: number of rows and columnsscales: same x or y scale on all facets?switch: move labels from left to right or top to bottom?drop: drop unused factor levelsdir: horizontal or verticalstrip.position: where to put the labelsfacet_grid()facet_wrap()ipcs_data %>%
filter(SID == "216") %>%
select(SID, beep, afraid:content) %>%
pivot_longer(
cols = afraid:content
, names_to = "item"
, values_to = "value"
) %>%
ggplot(aes(x = beep, y = value, group = item)) +
geom_point() +
geom_line() +
facet_wrap(
~item
, ncol = 1
, strip.position = "right"
) +
theme_classic()facet_wrap(): Change scale and spaceipcs_data %>%
filter(SID == "216") %>%
select(SID, beep, afraid:content) %>%
pivot_longer(
cols = afraid:content
, names_to = "item"
, values_to = "value"
) %>%
ggplot(aes(x = beep, y = value, group = item)) +
geom_point() +
geom_line() +
facet_grid(
item ~ .
, scales = "free_y"
, space = "free_y"
) +
theme_classic()Remember this?
ipcs_data %>%
filter(SID == "216") %>%
ggplot(mapping = aes(x = purposeful, y = happy)) +
geom_jitter(width = .1, height = .1, alpha = .25) +
geom_smooth(
method = "lm"
, formula = y ~ x
, se = T
, color = "blue"
) +
labs(
x = "Momentary Purpose (1-5)"
, y = "Momentary Happiness (1-5)"
, title = "Zero-Order Associations
Between Momentary Happiness and Purpose"
) +
theme_classic() # I just hate grey backgroundsLabels also apply to other mappings like color
You can also use labels to remove axis labels
ipcs_data %>%
filter(SID %in% c("216")) %>%
select(SID, Full_Date, happy, purposeful, afraid, attentive) %>%
pivot_longer(
cols = c(-SID, -Full_Date)
, names_to = "item"
, values_to = "value"
) %>%
ggplot(aes(
y = item
, x = value
, fill = item
)) +
geom_boxplot(width = .5) +
labs(
x = "Momentart Rating (1-5)"
, y = NULL
, fill = "Item"
) +
theme_classic()theme_ into your R console, and look at the functions that pop uptheme_classic() (what we’ve been using)theme_bw()theme_minimal() (but is there a theme_maximal?)theme_voidipcs_data %>%
filter(SID == "216") %>%
ggplot(mapping = aes(x = purposeful, y = happy)) +
geom_jitter(width = .1, height = .1, alpha = .25) +
geom_smooth(
method = "lm"
, formula = y ~ x
, se = T
, color = "blue"
) +
labs(
x = "Momentary Purpose (1-5)"
, y = "Momentary Happiness (1-5)"
, title = "Zero-Order Associations Between Momentary Happiness and Purpose"
) +
theme_classic() +
theme(
plot.title = element_text(
face = "bold"
, size = rel(1.1)
, hjust = .5
)
, axis.title = element_text(
face = "bold"
, size = rel(1.1)
)
, axis.text = element_text(
face = "bold"
, size = rel(1.2)
)
)ipcs_data %>%
filter(SID %in% c("216")) %>%
select(SID, Full_Date, happy, purposeful, afraid, attentive) %>%
pivot_longer(
cols = c(-SID, -Full_Date)
, names_to = "item"
, values_to = "value"
, values_drop_na = T
) %>%
group_by(item) %>%
summarize(
mean = mean(value)
, ci = 1.96*(sd(value)/sqrt(n()))
) %>%
ggplot(aes(x = item, fill = item, y = mean)) +
geom_col(color = "black") +
geom_errorbar(
aes(ymin = mean - ci, ymax = mean + ci)
, position = position_dodge(width = .1)
, width = .1
, stat = "identity"
) +
labs(
x = NULL
, y = "Mean Momentary Rating (CI)\n[Range 1-5]"
, title = "Descriptive Statistics of Momentary Emotion Ratings"
) +
theme_classic() +
theme(
legend.position = "none"
, plot.title = element_text(face = "bold", size = rel(1.1), hjust = .5)
, axis.title = element_text(face = "bold", size = rel(1.1))
, axis.text = element_text(face = "bold", size = rel(1.2))
)ipcs_data %>%
filter(SID == "216") %>%
select(SID, beep, afraid:content) %>%
pivot_longer(
cols = afraid:content
, names_to = "item"
, values_to = "value"
) %>%
ggplot(aes(x = beep, y = value, group = item)) +
geom_line(aes(color = item)) +
geom_point(size = 1) +
facet_grid(item~.) +
labs(
x = "ESM Beep (#)"
, y = "Rated Momentary Value (1-5)"
, title = "Time Series of Four Momentary Emotion Items for Participant 216"
, color = NULL
) +
theme_classic() +
theme(
legend.position = "bottom"
, legend.text = element_text(face = "bold", size = rel(1.1))
, plot.title = element_text(face = "bold", size = rel(1.1), hjust = .5)
, axis.title = element_text(face = "bold", size = rel(1.1))
, axis.text = element_text(face = "bold", size = rel(1.2))
, strip.background = element_rect(color = "black", fill = "cornflowerblue")
, strip.text = element_text(face = "bold", size = rel(1.2), color = "white")
)PSC 290 - Data Visualization